--- title: "RC17 & GFP32 Integration" author: "Nina-Lydia Kazakou" date: "19/03/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(Seurat) library(ggplot2) library(ggsci) library(here) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` ### Load objects ```{r} RC17 <- readRDS(here("data", "Processed", "AllCells", "RC17_AllCells_srt.rds")) ``` ```{r eval=FALSE} GFP32 <- readRDS(here("data", "Processed", "Original_Objects", "GFP32_annotated_srt.rds")) ``` #### Polish GFP32 metadata and save object ```{r eval=FALSE} metadata.filt <- GFP32@meta.data metadata.filt$Cells <- rownames(metadata.filt) metadata.filt$Sample <- NA t1 <- grepl("^preOL", metadata.filt$Cells) t2 <- grepl("^earlyOL_", metadata.filt$Cells) t3 <- grepl("^lateOL_", metadata.filt$Cells) metadata.filt$Sample[which(t1)] <- "wk1" metadata.filt$Sample[which(t2)] <- "wk2" metadata.filt$Sample[which(t3)] <- "wk3" GFP32@meta.data <- metadata.filt GFP32@meta.data$dataset <- NULL GFP32@meta.data$Dataset <- "GFP32_Monolayer_AllCells" GFP32@meta.data$seurat_clusters <- NULL GFP32[["percent.ribo"]] <- PercentageFeatureSet(GFP32, pattern = "^RP[SL]") GFP32@meta.data$ClusterID <- GFP32@meta.data$Cluster_id GFP32@meta.data$Cluster_id <- NULL current.ids <- c("RADIAL GLIA", "GLIA PROGENITOR", "ATROCYTE 1", "OPC 1", "NEURON 1", "OLIGO 2", "CYCLING CELLS 1", "NEURON 2", "OPC 2", "PRE-OPC", "ASTROCYTE 2", "OLIGO 1", "OLIGO 3", "CYCLING CELLS 2", "PERICYTE") new.ids <- c("Radial_Glia", "Glial_Progenitors", "Astrocytes_1", "OPCs_1", "Neurons_1", "Oligos_2", "CyC_1", "Neurons_2", "OPCs_2", "pre-OPCs", "Astrocytes_2", "Oligos_1", "Oligos_3", "CyC_2", "Pericytes") GFP32@meta.data$ClusterID <- plyr::mapvalues(x = GFP32@meta.data$ClusterID, from = current.ids, to = new.ids) GFP32 <- CellCycleScoring(object = GFP32, g2m.features = cc.genes$g2m.genes,s.features = cc.genes$s.genes) head(GFP32@meta.data, 2) ``` ```{r} GFP32 <- readRDS(here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds")) ``` ```{r eval=FALSE} dir.create(here("data", "Processed", "GFP32")) saveRDS(GFP32, here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds")) ``` ```{r Pre-process_metadata} GFP32@meta.data$GFP32_ClusterID <- GFP32@meta.data$ClusterID GFP32@meta.data$Dataset_ClusterID <- GFP32@meta.data$ClusterID old <- c("Radial_Glia", "Glial_Progenitors", "Astrocytes_1", "OPCs_1", "Neurons_1", "Oligos_2", "CyC_1", "Neurons_2", "OPCs_2", "pre-OPCs", "Astrocytes_2", "Oligos_1", "Oligos_3", "CyC_2", "Pericytes") new <- c("GFP32_Radial_Glia", "GFP32_Glial_Progenitors", "GFP32_Astrocytes_1", "GFP32_OPCs_1", "GFP32_Neurons_1", "GFP32_Oligos_2", "GFP32_CyC_1", "GFP32_Neurons_2", "GFP32_OPCs_2", "GFP32_pre-OPCs", "GFP32_Astrocytes_2", "GFP32_Oligos_1", "GFP32_Oligos_3", "GFP32_CyC_2", "GFP32_Pericytes") GFP32@meta.data$Dataset_ClusterID <- plyr::mapvalues(x = GFP32@meta.data$Dataset_ClusterID, from = old, to = new) GFP32@meta.data$ClusterID <- NULL RC17@meta.data$RC17_ClusterID <- RC17@meta.data$ClusterID RC17@meta.data$Dataset_ClusterID <- RC17@meta.data$ClusterID old.id <- c("Radial_Glia_1", "Radial_Glia/Pre-OPCs_1", "Astrocytes_1","Astrocytes_2", "Oligodendroglia_1", "Unknown", "Radial_Glia_2", "Outer-Radial_Glia_1", "Outer-Radial_Glia_2", "Radial_Glia/Pre-OPCs_2", "Neurons_1", "Neurons_2", "Oligodendroglia_2", "Radial_Glia_3", "Oligodendroglia_3", "Neurons_3", "Oligodendroglia_4", "Oligodendroglia_5", "Oligodendroglia_6", "Pericytes", "Outer-Radial_Glia_3") new.id <-c("RC17_Radial_Glia_1", "RC17_Radial_Glia/Pre-OPCs_1", "RC17_Astrocytes_1","RC17_Astrocytes_2", "RC17_Oligodendroglia_1", "RC17_Unknown", "RC17_Radial_Glia_2", "RC17_Outer-Radial_Glia_1", "RC17_Outer-Radial_Glia_2", "RC17_Radial_Glia/Pre-OPCs_2", "RC17_Neurons_1", "RC17_Neurons_2", "RC17_Oligodendroglia_2", "RC17_Radial_Glia_3", "RC17_Oligodendroglia_3", "RC17_Neurons_3", "RC17_Oligodendroglia_4", "RC17_Oligodendroglia_5", "RC17_Oligodendroglia_6", "RC17_Pericytes", "RC17_Outer-Radial_Glia_3") RC17@meta.data$Dataset_ClusterID <- plyr::mapvalues(x = RC17@meta.data$Dataset_ClusterID, from = old.id, to = new.id) RC17@meta.data$ClusterID <- NULL ``` # Integration for RC17 & GFP32 ## CCA ```{r CCA} ol_anchors <- FindIntegrationAnchors(object.list = list(GFP32, RC17), dims = 1:20) ol_comb <- IntegrateData(anchorset = ol_anchors, dims = 1:20) DefaultAssay(ol_comb) <- "integrated" ``` # Run the standard workflow for visualization and clustering ```{r Clustering_Integrated_srt} all_genes <- rownames(ol_comb) ol_comb <- ScaleData(ol_comb, features = all_genes) ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE) # t-SNE and Clustering ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindClusters(ol_comb, resolution = 0.5) ``` ```{r eval=FALSE} dir.create(here("data", "Processed", "Integrations", "GFP32_RC17")) saveRDS(ol_comb, here("data", "Processed", "Integrations", "GFP32_RC17", "GFP32_RC17_AllCells_comb.rds")) ``` # Visualization ```{r} DimPlot(ol_comb, group.by = "Dataset", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) ``` ```{r fig.width=8} DimPlot(ol_comb, group.by = "Dataset", split.by = "Dataset", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) & NoLegend() ``` ```{r} DimPlot(ol_comb, group.by = "orig.ident", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7])) ``` ```{r fig.height=3, fig.width=12} DimPlot(ol_comb, group.by = "orig.ident", split.by = "orig.ident", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7])) & NoLegend() ``` ```{r fig.height=3, fig.width=8} DimPlot(ol_comb, group.by = "orig.ident", split.by = "Sample", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7])) ``` ```{r fig.height=3, fig.width=8} DimPlot(ol_comb, group.by = "Dataset", split.by = "Phase", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) ``` ```{r fig.height=5, fig.width=12} DimPlot(ol_comb, group.by = "Dataset_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP) ``` ```{r fig.height=10, fig.width=10, message=FALSE, warning=FALSE} DimPlot(ol_comb, group.by = "RC17_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP, label = TRUE, label.size = 3) & NoLegend() DimPlot(ol_comb, group.by = "GFP32_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP, label = TRUE, label.size = 3) & NoLegend() ``` ```{r} rm(GFP32, RC17, ol_comb, ol_anchors) ``` ## Label Transfer for Oligodendroglia (from RC17 to GFP32) ```{r} RC17_ol <- readRDS(here("data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds")) GFP32 <- readRDS(here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds")) Idents(GFP32) <- "ClusterID" GFP32_ol <- subset(GFP32, idents = c("Oligos_1", "Oligos_2", "Oligos_3", "OPCs_1", "OPCs_2", "pre-OPCs")) ``` ```{r} lt_anchors <- FindTransferAnchors(reference = RC17_ol , query = GFP32_ol, dims = 1:30) predictions <- TransferData(anchorset = lt_anchors, refdata = RC17_ol@meta.data$ClusterID, dims = 1:30) lt_GFP32_ol <- AddMetaData(GFP32_ol, metadata = predictions) ``` ```{r} DimPlot(lt_GFP32_ol, group.by = "ClusterID" , cols = mycoloursP[10:40], label = TRUE) DimPlot(lt_GFP32_ol, group.by = "predicted.id", cols = mycoloursP[11:40] ) DimPlot(lt_GFP32_ol, group.by = "predicted.id", cols = mycoloursP[11:40], label = "TRUE", label.size = 3) & NoLegend() ``` ```{r fig.height=10, fig.width=8} DimPlot(lt_GFP32_ol, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[11:40], ncol = 2) & NoLegend() ``` ### Correlation between ol ID and predicted ID ```{r fig.height=8, fig.width=6} library(corrplot) met_dat <- lt_GFP32_ol@meta.data cor_df <- data.frame(orig_id = met_dat$ClusterID, pred_id = met_dat$predicted.id) cor_df$pred_id <- as.factor(cor_df$pred_id) cor_df$pred_id <- factor(cor_df$pred_id, levels = c("OPC_1", "pri-OPC", "OPC_2", "MAO", "OAPC", "OLIGO_1", "CyP_1", "Cyp_2", "OLIGO_2", "OPC_3", "SPARCL1+ OLIGO_3")) cor_matr <- matrix(data= 0, nrow = length(levels(cor_df$orig_id)), ncol = length(levels(cor_df$pred_id))) rownames(cor_matr) <- levels(cor_df$orig_id) colnames(cor_matr) <- levels(cor_df$pred_id) for(i in 1: nrow(cor_df)){ name1 <- as.character(cor_df[i, 1]) name2 <- as.character(cor_df[i, 2]) cor_matr[name1, name2] <- cor_matr[name1, name2] + 1 } max(cor_matr) centr_cor_matr <- cor_matr/ max(cor_matr) corrplot(centr_cor_matr) ``` ## Label Transfer for Oligodendroglia (from GFP32 to RC17) ```{r} lt_anchors <- FindTransferAnchors(reference = GFP32_ol , query = RC17_ol, dims = 1:30) predictions <- TransferData(anchorset = lt_anchors, refdata = GFP32_ol@meta.data$ClusterID, dims = 1:30) lt_RC17_ol <- AddMetaData(RC17_ol, metadata = predictions) ``` ```{r} DimPlot(lt_RC17_ol, group.by = "ClusterID" , cols = mycoloursP[10:40], label = TRUE) DimPlot(lt_RC17_ol, group.by = "predicted.id", cols = mycoloursP[11:40] ) DimPlot(lt_RC17_ol, group.by = "predicted.id", cols = mycoloursP[11:40], label = "TRUE", label.size = 3) & NoLegend() ``` ```{r fig.height=8, fig.width=8} DimPlot(lt_RC17_ol, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[11:40], ncol = 2) & NoLegend() ``` ### Correlation between ol ID and predicted ID ```{r fig.height=5, fig.width=6} met_dat <- lt_RC17_ol@meta.data cor_df <- data.frame(RC17_ol_id = met_dat$ClusterID, nature_id = met_dat$predicted.id) cor_df$nature_id <- as.factor(cor_df$nature_id) cor_df$nature_id <- factor(cor_df$nature_id, levels = c("pre-OPCs", "OPCs_1", "OPCs_2", "Oligos_1", "Oligos_2", "Oligos_3")) cor_matr <- matrix(data= 0, nrow = length(levels(cor_df$RC17_ol_id)), ncol = length(levels(cor_df$nature_id))) rownames(cor_matr) <- levels(cor_df$RC17_ol_id) colnames(cor_matr) <- levels(cor_df$nature_id) for(i in 1: nrow(cor_df)){ name1 <- as.character(cor_df[i, 1]) name2 <- as.character(cor_df[i, 2]) cor_matr[name1, name2] <- cor_matr[name1, name2] + 1 } max(cor_matr) centr_cor_matr <- cor_matr/ max(cor_matr) corrplot(centr_cor_matr) ``` ### Correct counts not for largest total counts, but first for cluster size ```{r} clu_counts <- table(cor_counts = lt_RC17_ol@meta.data$ClusterID) resiz_mat <- cor_matr for(i in length(rownames(resiz_mat))){ resiz_mat[rownames(resiz_mat)[i], ] <- resiz_mat["pri-OPC", ] / clu_counts[rownames(resiz_mat)[i]] } # Now bring the data on a scale of 0-1 to enable corrplots centr_resiz_mat <- resiz_mat/ max(resiz_mat) ``` ```{r fig.height=5, fig.width=6, message=FALSE, warning=FALSE} library(RColorBrewer) corrplot(centr_resiz_mat, type = "full",method = "circle", col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), cl.lim = c(0,1), mar = c(1, 0, 1, 0), tl.col = "black") ``` ```{r} sessionInfo() ```